# ==============================================================================
# file name: 03-identify-repeated-participation-LU.sql
# date: Mar 12, 2024
# author: Bernhard Clemm 
# purpose: identify attempts at repeated participation (Lucid)
# THIS SCRIPT REQUIRES ACCESS TO THE RAW DATA AND SERVES FOR REFERENCE ONLY
# ==============================================================================

SELECT 
v.person_id, v.wave, v.created_utc, p.duration_s_5_na, v.same_as_prev_url, v.url,
CASE 
WHEN (v.url_host LIKE '%questionpro.com' AND REGEXP_LIKE(v.url, '/a/TakeSurvey\?tt=[a-zA-Z0-9]')) 
THEN CONCAT(v.url_protocol, '//', v.url_host, v.url_path, '?', REGEXP_EXTRACT(v.url_query, '.+?(?=\&)'))
ELSE CONCAT(v.url_protocol, '//', v.url_host, v.url_path) 
END AS url_quest
FROM 
(SELECT *, CASE WHEN url = url_previous THEN 1 ELSE 0 END same_as_prev_url -- indicator for whether visit happening directly after each other.
FROM
(SELECT id, person_id, wave, created_utc, url, url_protocol, url_host, url_path, url_query,
LAG(url) OVER(PARTITION BY person_id ORDER BY created_utc) url_previous
FROM visits_clean_url
where country = 'US'
ORDER BY person_id, created_utc)) v
LEFT JOIN visits_professionals p on p.id = v.id
WHERE 
(v.url_host LIKE '%confirmit.com' AND REGEXP_LIKE(v.url_path, '^/wix/[a-zA-Z0-9]')) OR 
(v.url_host LIKE '%surveygizmo.com' AND REGEXP_LIKE(v.url_path, '^/s3/[a-zA-Z0-9]')) OR
(v.url_host LIKE '%surveygizmo.eu' AND REGEXP_LIKE(v.url_path, '^/s3/[a-zA-Z0-9]')) OR
(v.url_host LIKE '%surveymonkey.com' AND REGEXP_LIKE(v.url_path, '^/r/[a-zA-Z0-9]')) OR
(v.url_host LIKE '%qualtrics.com' AND REGEXP_LIKE(v.url_path, '^/jfe/form/[a-zA-Z0-9]')) OR
(v.url_host LIKE '%survey.cmix.com' AND REGEXP_LIKE(v.url_path, '^/[A-Z0-9]')) OR
(v.url_host LIKE '%questionpro.com' AND REGEXP_LIKE(v.url_path, '/t/[a-zA-Z0-9]')) OR
(v.url_host LIKE '%questionpro.com' AND REGEXP_LIKE(v.url, '/a/TakeSurvey\?tt=[a-zA-Z0-9]')) OR
(v.url_host LIKE '%formsite.com' AND REGEXP_LIKE(v.url_path, '[a-zA-Z0-9]/index\.html$')) OR 
(v.url_host LIKE '%unipark.de' AND REGEXP_LIKE(v.url_path, '^/uc/[a-zA-Z0-9]')) OR
(v.url_host LIKE '%typeform.com' AND REGEXP_LIKE(v.url_path, '^/to/[a-zA-Z0-9]')) OR
(v.url_host LIKE '%formstack.com' AND REGEXP_LIKE(v.url_path, '^/forms/(?![a-zA-Z0-9]*index\.php)')) OR
(v.url_host LIKE '%zohopublic.com' AND REGEXP_LIKE(v.url_path, '^/zs/[a-zA-Z0-9]')) OR
(v.url_host LIKE '%zohopublic.eu' AND REGEXP_LIKE(v.url_path, '^/zs/[a-zA-Z0-9]'))

-- exported as repeated_participation_LU.csv (not not contained in reproduction repository as raw data)